from wordsegment import load,segment
import numpy as np
import matplotlib.pyplot as plt
from scipy import spatial
from scipy.spatial import distance
import csv
from sklearn.manifold import TSNE
dict_emb={}
with open('E:\\glove.6B.50d.txt','r',encoding='utf-8') as fin:
    for line in fin:
        v=line.split()
        w=v[0]
        vec=np.asarray(v[1:],"float32")
        dict_emb[w]=vec

load()
ht=['holidayvibes','gamenight','happyholidays','carsofcommunity','fallguysmoments','interiordesign','homecooked','veteransday','youwantmore','interiordesign','coldweather','wildanimals','mycostume','meleaving','myprofilepicture','catchphrases','watchmegrow','holidaycrafts','growupwithme','clingypet','happyhanukkah','lunarnewyear',
       'tabletop','comfortfood','selfimprovement','affirmations','perfectmatch','givingseason','holidaycountdown','bakingseason','holidaymusic','familyimpression','inkdrawing','WeekendVibes','recordsday','productivity','smallbusiness'
       ,'falldiy','whenwewereyounger','yellow','ComingOfAge','artchristmas','gaminglife','gamingsetup','hellowinter','planttiktok','housetour','neonshadow','homeoffice','raisedby','makeitvogue','foodtiktok','valentinesday','yougotthis','stemlife','makeitvogue']
ht=['workinprogress', 'haveseen', 'outfitoftheday', 'bekind', 'personalfinance', 'cozyathome', 'RoomTour', 'theatrekids', 'ImAGhost', 'holiday', 'halloweenlook', 'happyhalloween', 'welldone', 'motivationmonday', 'thinkabout', 'nonuancenovember', 'ourtype', 'fanedit', 'needtoknow', 'clean', 'graphicdesign', 'readysetshop', 'holidaysourway', 'onlinedating', 'myhobby', 'food', 'whereilive', 'myrecommendation', 'worldseries', 'animation', 'cocinando', 'easydiy', 'diceroll', 'rnbvibes', 'festivefashion', 'holidaydecor', 'nbadraft', 'halloweenishere', 'christmas', 'howbizarre', 'givingthanks', 'holidayvibes', 'gamenight', 'happyholidays', 'cars', 'fallguysmoments', 'interiordesign', 'homecook', 'veteransday', 'youwantmore', 'coldweather', 'wildanimals', 'mycostume', 'meleaving', 'myprofilepicture', 'catchphrases', 'watchmegrow', 'holidaycrafts', 'growupwithme', 'pet', 'happyhanukkah', 'lunarnewyear', 'tabletop', 'comfortfood', 'selfimprovement', 'affirmations', 'perfectmatch', 'givingseason', 'holidaycountdown', 'bakingseason', 'holidaymusic', 'familyimpression', 'inkdrawing', 'WeekendVibes', 'recordsday', 'productivity', 'smallbusiness', 'falldiy', 'whenwewereyounger', 'yellow', 'artchristmas', 'gaminglife', 'gamingsetup', 'hellowinter', 'plant', 'housetour', 'neonshadow', 'homeoffice', 'raisedby', 'makeitvogue', 'food', 'valentinesday', 'yougotthis', 'stemlife']
ht=['falldiy', 'gaminglife', 'interiordesign', 'food', 'worldseries', 'halloweenishere', 'meleaving', 'yellow', 'yougotthis', 'mycostume', 'animation', 'youwantmore', 'halloweenlook', 'myprofilepicture', 'nativefamily', 'welldone', 'happyhalloween', 'OhNo', 'holiday', 'rnbvibes', 'cars', 'homeoffice', 'myrecommendation', 'fallfashion', 'familyimpression', 'myhobby', 'neonshadow', 'fanedit', 'stemlife', 'motivationmonday', 'workinprogress', 'veteransday', 'holidaycountdown', 'whereilive', 'bekind', 'onhold', 'diwali', 'WeekendVibes', 'bakingseason', 'RoomTour', 'food', 'growupwithme', 'holidaysourway', 'tabletop', 'nbadraft', 'recordsday', 'ourtype', 'nonuancenovember', 'artchristmas', 'gamingsetup', 'theatrekids', 'needtoknow', 'whenwewereyounger', 'graphicdesign', 'inkdrawing', 'fallguysmoments', 'familyrecipe', 'onlinedating', 'watchmegrow', 'givingthanks', 'readysetshop', 'diceroll', 'smallbusiness', 'holidayvibes', 'coldweather', 'givingseason', 'happyholidays', 'homecook', 'easydiy', 'wildanimals', 'holidaymusic', 'howbizarre', 'selfimprovement', 'ImAGhost', 'personalfinance', 'housetour', 'thinkingabout', 'holidaycrafts', 'happyhanukkah', 'RatatouilleMusical', 'catchphrases', 'outfitoftheday', 'comfortfood', 'holidaydecor', 'raisedby', 'goodmorning', 'makeitvogue', 'festivefashion', 'gamenight', 'haventseen', 'plant', 'productivity', 'lovestory', 'wishlist', 'hellowinter', 'Year', 'nbaisback', 'perfectgifts', 'holidaytreats', 'wrappinggifts', 'feliznavidad', 'timewarpjump', 'christmas', 'MyHaul', 'cozyathome', 'winterfit', 'withouttellingme', 'hyperfixated', 'whatilearned', 'joedizzle', 'WordsOfWisdom', 'bye', 'rareaesthetic', 'welcome', 'dailyvlog', 'easyrecipe', 'mystyle', 'myroutine', 'problemstop', 'gamergoals', 'projectcar', 'homemade', 'inlove', 'GreenScreenScan', 'tortillatrend', 'NFLplayoffs', 'FitnessRoutine', 'IsThisAvailable', 'wee', 'weirdpets', 'money', 'healthycooking', 'nhlfaceoff', 'groupchat', 'winterfashion', 'skincare', 'zodiacsign', 'homeimprovement', 'seashanty', 'clean', 'visionboard', 'affirmations', 'mlkday', 'joblife', 'foodie', 'timewarpwaterfall', 'plantparent', 'WinterMagic', 'notaperfectperson', 'tutorial', 'cocinando', 'OlympicsCountdown', 'couplethings', 'clingypet', 'meditation', 'winterbeautytips', 'diy', 'typing', 'roundofapplause', 'feelinggood', 'RoyalRumble', 'emophase', 'favoriteslippers', 'wintersports', 'makeblackhistory', 'relationshipstorytime', 'albumcover', 'stepbystep', 'fetapasta', 'womeninsports', 'healthyheart', 'imbusyrightnow', 'beautyhacks', 'tailgate', 'puppybowl', 'superbowllv', 'valentinesdiy', 'melaninmagic', 'coversforlovers', 'kissyourpet', 'womeninstem', 'lunarnewyear', 'perfectmatch', 'galentinesday', 'valentinesday', 'loveyourinsecurities', 'fashionmonth', 'blackcreatives', 'colddays', 'mifamilia', 'careeradvice', 'stopasianhate', 'perfectdrink', 'snowstorm', 'carhacks', 'homeproject', 'blackandproud', 'dramaticmoments', 'bakedoats', 'homecook', 'laughingduet', 'yoga', 'somethingyoulearned', 'upcycling', 'fantheory', 'fitness', 'gaming', 'seitan', 'glasspainting', 'whenwomenwin', 'science', 'thriftflip', 'Lifestyle', 'wildlifeday', 'dayandnight', 'ontherunway', 'food', 'homediy', 'nbaallstar']
ht=['cancelthenoise', 'itwasntme', 'beatsdaisychallenge', 'monclerbubbleup', 'airpodsjump', 'inmyaejeans', 'thisisbliss', 'gotmilkchallenge', 'expressieyourself', 'dopacsun', 'strictlycurls', 'hereforrmhc', 'boseallout', 'moodflip', 'whatsyourpower', 'asosfashunweek', 'letsfaceit', 'showmeyourwalk', 'scoobdance', 'merrybossmas']
ht=['videosnap', 'cancelthenoise', 'itwasntme', 'beatsdaisy', 'monclerbubbleup', 'airpodsjump', 'inmyaejeans', 'thisisbliss', 'gotmilk', 'expressieyourself', 'upthebeat', 'dopacsun', 'chemicalhearts', 'perfectasiam', 'strictlycurls', 'hereforrmhc', 'boseallout', 'moodflip', 'readysetgo', 'whatsyourpower', 'asosfashunweek', 'letsfaceit', 'showmeyourwalk', 'scoobdance', 'merrybossmas', 'thesplashdance', 'micellarrewind', 'unwrapthedeals', 'showupshowoff', 'heinzhalloween', 'makeitvogue', 'katespadenyhappydance', 'getcrocd', 'goforthehandful', 'handwash', 'calistar', 'morehappydenimdance']

ht=['handwash', 'videosnap', 'jump', 'letsfaceit', 'itwasntme', 'cancelthenoise', 'happydance', 'goforthehandful', 'beatsdaisy', 'perfectasiam', 'get', 'expressyourself', 'upthebeat', 'allout', 'readysetgo', 'bubbleup', 'showupshowoff', 'dance', 'fashionweek', 'thesplashdance', 'whatsyourpower', 'morehappydenimdance', 'thisis', 'unwrapthedeals', 'chemicalhearts', 'moodflip', 'herefor', 'halloween', 'gotmilk', 'do', 'calistarchallenge', 'merry', 'rewind', 'makeitvogue', 'strictlycurls', 'inmyjeans', 'showmeyourwalk']


hts={}
emb={}
with open('D:\\Work\\kusuri\\wordseg.tsv','w',newline='\n',encoding='utf-8')as fin:
    writer = csv.writer(fin, delimiter='\t')

    for h in ht:
        words=segment(h)
        hts[h]=words
        temb = np.zeros(len(dict_emb['pi']))
        #temb=[]
        for word in words:
            if word not in dict_emb.keys():
                print(word, 'not in dict')
            else:
                #temb=np.maximum(temb,dict_emb[word])
                temb=np.add(temb,dict_emb[word])#这里
        temb = np.divide(temb, len(words))

        nwords=[word for word in words if word in dict_emb.keys()]
        cossim=0
        for i in range(len(nwords)):
            for j in range(i + 1, len(nwords)):
                cossim+= distance.euclidean(dict_emb[nwords[i]], dict_emb[nwords[j]])
                #print(nwords[i], nwords[j], cossim)
        if len(nwords)!=1:
            cossim=cossim/(len(nwords) * (len(nwords) - 1) / 2)
        emb[h]=temb
        templ=[h,cossim]
        #templ.extend(temb)
        writer.writerow(templ)
        # print(h,cossim,temb)

# tsne = TSNE(n_components=2, random_state=0)
# x=emb.keys()
# vecotrs=[emb[h] for h in emb.keys()]
# Y=tsne.fit_transform(vecotrs)
# plt.scatter(Y[:,0],Y[:,1])
# for label, i,j in zip(x,Y[:,0],Y[:,1]):
#     plt.annotate(label,xy=(i,j),xytext=(0,0),textcoords='offset points')
# plt.show()